
#爬取简历模板
import os

import requests
from lxml import etree
#import os
if __name__ == "__main__":
    url = 'https://sc.chinaz.com/jianli/free.html'
    headers ={
        'User-Agent' : 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/105.0.0.0 Safari/537.36'
    }
    response = requests.get(url=url,headers=headers)
    response.encoding = 'utf-8'
    page_text = response.text
    #print(page_text)
    tree = etree.HTML(page_text)
    a_list = tree.xpath('//div[@class="box col3 ws_block"]')#//*[@id="container"]/div[1]
    print(a_list)
    if not os.path.exists('./moban'):
       os.mkdir('./moban')
    for a in a_list:
        a_herf = 'https:'+a.xpath('./a/@href')[0]
        a_name = a.xpath('./a/img/@alt')[0] + '.rar'
        #img_name = img_name.encode('iso-8859-1').decode('gbk')
        #print(a_name)
       # img_data = requests.get(url=img_src,headers=headers).content
       # img_path = 'moban/'+img_name
        #with open(img_path,'wb') as fp:
        #    fp.write(img_data)
        #    print(img_name,'下载成功')
        down_w = requests.get(url=a_herf,headers=headers).text
        #print(down_wrap)
        b_tree = etree.HTML(down_w)
        b_list = b_tree.xpath('//div[@class="clearfix mt20 downlist"]/ul/li | ./a/@href')[0]
        b_data = requests.get(url=a_herf,headers=headers).content
        b_path = 'moban/'+a_name
        with open(b_path,'wb') as fp:
            fp.write(b_data)
            print('完成！')


